In [1]:
import os
## Set directory
os.chdir('/hpc/group/pbenfeylab/CheWei/CW_data/genesys')
import networkx as nx
from genesys_evaluate_v1 import *
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import warnings
# Suppress all warning messages
warnings.filterwarnings("ignore", category=DeprecationWarning)
/hpc/group/pbenfeylab/ch416/miniconda3/envs/genesys/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
In [2]:
## Conda Env pytorch-gpu on DCC
print(torch.__version__)
print(sc.__version__)
1.11.0 1.9.6
In [3]:
## Genes considered/used (shared among samples)
gene_list = pd.read_csv('./gene_list_1108.csv')
Load Data¶
In [4]:
with open("./genesys_root_data.pkl", 'rb') as file_handle:
data = pickle.load(file_handle)
batch_size = 2000
dataset = Root_Dataset(data['X_test'], data['y_test'])
loader = DataLoader(dataset,
batch_size = batch_size,
shuffle = True, drop_last=True)
In [5]:
input_size = data['X_train'].shape[1]
## 10 cell types
output_size = 10
embedding_dim = 256
hidden_dim = 256
n_layers = 2
device = "cpu"
path = "./"
Load trained GeneSys model¶
In [6]:
model = ClassifierLSTM(input_size, output_size, embedding_dim, hidden_dim, n_layers).to(device)
model.load_state_dict(torch.load(path+"/workstation/genesys_model_trained_on_root_atlas_20240308_continue4.pth", map_location=torch.device('cpu')))
model = model
model.eval()
Out[6]:
ClassifierLSTM(
(fc1): Sequential(
(0): Linear(in_features=17513, out_features=256, bias=True)
(1): Dropout(p=0.2, inplace=False)
(2): GaussianNoise()
)
(fc): Sequential(
(0): ReLU()
(1): Linear(in_features=512, out_features=512, bias=True)
(2): ReLU()
(3): Linear(in_features=512, out_features=10, bias=True)
)
(lstm): LSTM(256, 256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
(dropout): Dropout(p=0.2, inplace=False)
(b_to_z): DBlock(
(fc1): Linear(in_features=512, out_features=256, bias=True)
(fc2): Linear(in_features=512, out_features=256, bias=True)
(fc_mu): Linear(in_features=256, out_features=512, bias=True)
(fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
)
(bz2_infer_z1): DBlock(
(fc1): Linear(in_features=1024, out_features=256, bias=True)
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(fc_mu): Linear(in_features=256, out_features=512, bias=True)
(fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
)
(z1_to_z2): DBlock(
(fc1): Linear(in_features=512, out_features=256, bias=True)
(fc2): Linear(in_features=512, out_features=256, bias=True)
(fc_mu): Linear(in_features=256, out_features=512, bias=True)
(fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
)
(z_to_x): Decoder(
(fc1): Linear(in_features=512, out_features=256, bias=True)
(fc2): Linear(in_features=256, out_features=256, bias=True)
(fc3): Linear(in_features=256, out_features=17513, bias=True)
)
)
In [7]:
classes = ['Columella', 'Lateral Root Cap', 'Phloem', 'Xylem', 'Procambium', 'Pericycle', 'Endodermis', 'Cortex', 'Atrichoblast', 'Trichoblast']
class2num = {c: i for (i, c) in enumerate(classes)}
num2class = {i: c for (i, c) in enumerate(classes)}
In [8]:
cts = ['Atrichoblast','Trichoblast','Cortex','Endodermis','Pericycle','Procambium','Xylem','Phloem','Lateral Root Cap','Columella']
ctw = np.zeros((len(cts), 17513, 17513))
## number of cells sampled from the atlas
batch_size = 2000
In [9]:
## GRN for the transition t5 to t7
for ct in cts:
print(ct)
cws = np.zeros((len(loader), 17513, 17513))
with torch.no_grad():
for i, sample in enumerate(loader):
x = sample['x'].to(device)
y = sample['y'].to(device)
y_label = [num2class[i] for i in y.tolist()]
pred_h = model.init_hidden(batch_size)
tfrom = model.generate_next(x, pred_h, 4).to('cpu').detach().numpy()
cfrom = tfrom[np.where(np.array(y_label)==ct)[0],:]
pred_h = model.init_hidden(batch_size)
tto = model.generate_next(x, pred_h, 6).to('cpu').detach().numpy()
cto = tto[np.where(np.array(y_label)==ct)[0],:]
cw = torch.linalg.lstsq(torch.tensor(cfrom), torch.tensor(cto)).solution.detach().numpy()
cws[i] = cw
## Calculate mean across number of repeats
cwm = np.mean(cws, axis=0)
ctw[cts.index(ct)] = cwm
Atrichoblast Trichoblast Cortex Endodermis Pericycle Procambium Xylem Phloem Lateral Root Cap Columella
In [10]:
# Save the array to disk
np.save('genesys_ctw_t5-t7.npy', ctw)
In [11]:
ctw = np.load('genesys_ctw_t5-t7.npy')
In [12]:
## Calculate z-scores
ctw_z = np.zeros((len(cts), 17513, 17513))
for i in range(len(cts)):
ctw_z[i] = (ctw[i] - np.mean(ctw[i])) / np.std(ctw[i])
In [13]:
## Filtering based on z-scores (with no weights)
ctw_f = np.zeros((len(cts), 17513, 17513))
## z-score threshold (keep values > mean + threshold*std)
threshold=3
for i in range(len(cts)):
ctw_f[i] = np.abs(ctw_z[i]) > threshold
Load TFs list¶
In [14]:
wanted_TFs = pd.read_csv("./Kay_TF_thalemine_annotations.csv")
In [15]:
## Make TF names unique and assign preferred names
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G33880"]="WOX9"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G45160"]="SCL27"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G04410"]="NAC78"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G29035"]="ORS1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02540"]="ZHD3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G16500"]="IAA26"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G09740"]="HAG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G24660"]="ZHD2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G46880"]="HDG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G28420"]="RLT1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G14580"]="BLJ"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G45260"]="BIB"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02070"]="RVN"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G28160"]="FIT"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G68360"]="GIS3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G20640"]="NLP4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G05550"]="VFP5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G59470"]="FRF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G15150"]="HAT7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G14750"]="WER"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G75710"]="BRON"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G74500"]="TMO7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G12646"]="RITF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G48100"]="ARR5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G16141"]="GATA17L"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G65640"]="NFL"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G62700"]="VND5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G36160"]="VND2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G66300"]="VND3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G12260"]="VND4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G62380"]="VND6"
In [16]:
pd.Series(wanted_TFs['Name']).value_counts().head(5)
Out[16]:
Name NAC001 1 PRE5 1 MYB118 1 MYB21 1 MYB0 1 Name: count, dtype: int64
Network analysis¶
In [17]:
TFidx = []
for i in wanted_TFs['GeneID']:
if i in gene_list['features'].tolist():
TFidx.append(np.where(gene_list['features']==i)[0][0])
TFidx = np.sort(np.array(TFidx))
In [19]:
def network(i):
## No weights
adj_nw = ctw_f[i]
## Weighted
adj = ctw[i]*ctw_f[i]
## TF only
adj = adj[np.ix_(TFidx,TFidx)]
adj_nw = adj_nw[np.ix_(TFidx,TFidx)]
## Remove no connect
regidx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[0]).value_counts().index[pd.Series(np.where(adj_nw==True)[0]).value_counts()>=1]))
taridx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[1]).value_counts().index[pd.Series(np.where(adj_nw==True)[1]).value_counts()>=1]))
## Reciprocol
keepidx = np.sort(np.array(list(set(regidx).intersection(taridx))))
#keepidx = np.sort(np.array(list(set(regidx).union(taridx))))
TFID = np.array(gene_list['features'][TFidx])[keepidx].tolist()
## TF name to keep
TFname = []
for i in np.array(gene_list['features'][TFidx])[keepidx]:
TFname.append(wanted_TFs['Name'][np.where(wanted_TFs['GeneID']==i)[0][0]])
adj = adj[np.ix_(keepidx,keepidx)]
# Create a NetworkX graph for non-directed edges
G = nx.Graph() # supports directed edges and allows for multiple edges between the same pair of nodes
# Add nodes to the graph
num_nodes = adj.shape[0]
for i, name in enumerate(TFname):
G.add_node(i, name=name)
# Add edges to the graph with weights
for i in range(num_nodes):
for j in range(num_nodes):
weight = adj[i, j]
if weight != 0:
G.add_edge(j, i, weight=abs(weight), distance=1/abs(weight))
## Measures the extent to which how close a node is to all other nodes in the network, considering the shortest paths or geodesic distances between nodes
closeness_centrality = nx.closeness_centrality(G, distance='distance')
## Measures the extent to which a node that are not only well-connected but also connected to other well-connected nodes.
eigenvector_centrality = nx.eigenvector_centrality(G)
# Create a NetworkX graph for diected edges
G = nx.MultiDiGraph() # supports directed edges and allows for multiple edges between the same pair of nodes
# Add nodes to the graph
num_nodes = adj.shape[0]
for i, name in enumerate(TFname):
G.add_node(i, name=name)
# Add edges to the graph with weights
for i in range(num_nodes):
for j in range(num_nodes):
weight = adj[i, j]
if weight != 0:
G.add_edge(j, i, weight=weight)
## Measures the number of connections (edges) each node has
degree_centrality = nx.degree_centrality(G)
# Calculate outgoing centrality
out_centrality = nx.out_degree_centrality(G)
# Calculate incoming centrality
in_centrality = nx.in_degree_centrality(G)
## Measures the extent to which a node lies on the shortest paths between other nodes.
betweenness_centrality = nx.betweenness_centrality(G, weight='weight')
## Non_Reciprocal Out centrality
# Visualize the graph
pos = nx.spring_layout(G) # Positions of the nodes
# Node colors based on weighted betweenness centrality
node_colors = [out_centrality[node] for node in G.nodes()]
# Node sizes based on weighted betweenness centrality
node_sizes = [out_centrality[node] * 1000 for node in G.nodes()]
# Get the edge weights as a dictionary
edge_weights = nx.get_edge_attributes(G, 'weight')
edge_colors = ['red' if weight > 0 else 'blue' for (_, _, weight) in G.edges(data='weight')]
# Scale the edge weights to desired linewidths
max_weight = max(edge_weights.values())
edge_widths = [float(edge_weights[edge]) / max_weight for edge in G.edges]
# Draw the graph
nx.draw(G, pos=pos, node_color=node_colors, node_size=node_sizes, with_labels=False, width=edge_widths, edge_color=edge_colors)
# Add node labels
labels = {node: G.nodes[node]['name'] for node in G.nodes}
nx.draw_networkx_labels(G, pos=pos, labels=labels, font_size=8)
# Add a colorbar to show the weighted betweenness centrality color mapping
sm = plt.cm.ScalarMappable(cmap='viridis', norm=plt.Normalize(vmin=min(node_colors), vmax=max(node_colors)))
sm.set_array([])
plt.colorbar(sm)
# Show the plot
plt.show()
dc = pd.DataFrame.from_dict(degree_centrality, orient='index', columns=['degree_centrality'])
oc = pd.DataFrame.from_dict(out_centrality, orient='index', columns=['out_centrality'])
ic = pd.DataFrame.from_dict(in_centrality, orient='index', columns=['in_centrality'])
bc = pd.DataFrame.from_dict(betweenness_centrality, orient='index', columns=['betweenness_centrality'])
cc = pd.DataFrame.from_dict(closeness_centrality, orient='index', columns=['closeness_centrality'])
ec = pd.DataFrame.from_dict(eigenvector_centrality, orient='index', columns=['eigenvector_centrality'])
df = pd.concat([dc,oc,ic,bc,cc,ec], axis=1)
df.index =TFname
df = df.sort_values('betweenness_centrality', ascending=False)
return(df)
In [20]:
atri = network(0)
In [21]:
tri = network(1)
In [22]:
cor = network(2)
In [23]:
end = network(3)
In [24]:
per = network(4)
In [25]:
pro = network(5)
In [26]:
xyl = network(6)
In [27]:
phl = network(7)
In [28]:
lrc = network(8)
In [29]:
col = network(9)
In [30]:
atri.columns = ['atri_degree_centrality','atri_out_centrality','atri_in_centrality','atri_betweenness_centrality','atri_closeness_centrality','atri_eigenvector_centrality']
tri.columns = ['tri_degree_centrality','tri_out_centrality','tri_in_centrality','tri_betweenness_centrality','tri_closeness_centrality','tri_eigenvector_centrality']
cor.columns = ['cor_degree_centrality','cor_out_centrality','cor_in_centrality','cor_betweenness_centrality','cor_closeness_centrality','cor_eigenvector_centrality']
end.columns = ['end_degree_centrality','end_out_centrality','end_in_centrality','end_betweenness_centrality','end_closeness_centrality','end_eigenvector_centrality']
per.columns = ['per_degree_centrality','per_out_centrality','per_in_centrality','per_betweenness_centrality','per_closeness_centrality','per_eigenvector_centrality']
pro.columns = ['pro_degree_centrality','pro_out_centrality','pro_in_centrality','pro_betweenness_centrality','pro_closeness_centrality','pro_eigenvector_centrality']
xyl.columns = ['xyl_degree_centrality','xyl_out_centrality','xyl_in_centrality','xyl_betweenness_centrality','xyl_closeness_centrality','xyl_eigenvector_centrality']
phl.columns = ['phl_degree_centrality','phl_out_centrality','phl_in_centrality','phl_betweenness_centrality','phl_closeness_centrality','phl_eigenvector_centrality']
lrc.columns = ['lrc_degree_centrality','lrc_out_centrality','lrc_in_centrality','lrc_betweenness_centrality','lrc_closeness_centrality','lrc_eigenvector_centrality']
col.columns = ['col_degree_centrality','col_out_centrality','col_in_centrality','col_betweenness_centrality','col_closeness_centrality','col_eigenvector_centrality']
In [44]:
## Indentify main regulators in each net work
tff = []
tff = tff + atri[atri['atri_betweenness_centrality']>0].index.tolist()
tff = tff + tri[tri['tri_betweenness_centrality']>0].index.tolist()
tff = tff + lrc[lrc['lrc_betweenness_centrality']>0].index.tolist()
tff = tff + cor[cor['cor_betweenness_centrality']>0].index.tolist()
tff = tff + end[end['end_betweenness_centrality']>0].index.tolist()
tff = tff + per[per['per_betweenness_centrality']>0].index.tolist()
tff = tff + pro[pro['pro_betweenness_centrality']>0].index.tolist()
tff = tff + xyl[xyl['xyl_betweenness_centrality']>0].index.tolist()
tff = tff + phl[phl['phl_betweenness_centrality']>0].index.tolist()
tff = tff + col[col['col_betweenness_centrality']>0].index.tolist()
tf_occurance = pd.DataFrame(pd.Series(tff).value_counts())
tf_occurance = tf_occurance.rename(columns={
'count': 'tf_occurance'
})
tf_spec = pd.concat([tf_occurance, atri, tri, lrc, cor, end, per, pro, xyl, phl, col], axis=1)
tf_spec = tf_spec.fillna(0)
In [45]:
## Epidermis (atri, tri, lrc)
celltype1='atri'
celltype2='tri'
celltype3='lrc'
ts = tf_spec[tf_spec['tf_occurance']==3][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==9].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[45]:
| atri_betweenness_centrality | tri_betweenness_centrality | lrc_betweenness_centrality | atri_out_centrality | tri_out_centrality | lrc_out_centrality | atri_in_centrality | tri_in_centrality | lrc_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| ARR6 | 0.747284 | 0.547855 | 0.000019 | 0.495413 | 0.146479 | 0.054432 | 0.204893 | 0.050704 | 0.043546 | 9 | 11.290626 |
In [46]:
## atri, tri
celltype1='atri'
celltype2='tri'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[46]:
| atri_betweenness_centrality | tri_betweenness_centrality | atri_out_centrality | tri_out_centrality | atri_in_centrality | tri_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|
| ARR5 | 0.886747 | 0.826697 | 0.513761 | 0.366197 | 0.351682 | 0.115493 | 6 | 9.060577 |
| LRL3 | 0.003058 | 0.897422 | 0.003058 | 0.909859 | 0.189602 | 0.242254 | 6 | 8.245253 |
| AT3G05860 | 0.960188 | 0.373582 | 0.278287 | 0.047887 | 0.321101 | 0.098592 | 6 | 8.079637 |
| WRKY61 | 0.007120 | 0.453028 | 0.370031 | 0.388732 | 0.103976 | 0.036620 | 6 | 7.359506 |
| AT2G37120 | 0.245933 | 0.174799 | 0.030581 | 0.295775 | 0.510703 | 0.090141 | 6 | 7.347932 |
| HB17 | 0.754826 | 0.000517 | 0.314985 | 0.028169 | 0.085627 | 0.025352 | 6 | 7.209476 |
| ZFHD1 | 0.023245 | 0.000517 | 0.055046 | 0.016901 | 0.440367 | 0.121127 | 6 | 6.657204 |
In [47]:
## Atrichoblast specific
celltype = 'atri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[47]:
| atri_betweenness_centrality | atri_out_centrality | atri_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| MC2 | 0.929176 | 0.131498 | 0.152905 | 3 | 4.213579 |
| TTG2 | 0.906371 | 0.235474 | 0.189602 | 3 | 4.331448 |
| NAC6 | 0.899280 | 0.406728 | 0.159021 | 3 | 4.465029 |
| HB24 | 0.895330 | 0.192661 | 0.107034 | 3 | 4.195024 |
| GL2 | 0.760577 | 0.149847 | 0.226300 | 3 | 4.136724 |
| RMR1 | 0.522701 | 0.180428 | 0.015291 | 3 | 3.718420 |
| PAT1 | 0.380199 | 0.021407 | 0.006116 | 3 | 3.407722 |
| AT3G13840 | 0.364036 | 0.015291 | 0.137615 | 3 | 3.516942 |
| AT2G28710 | 0.352142 | 0.382263 | 0.137615 | 3 | 3.872019 |
| OFP18 | 0.280483 | 0.125382 | 0.244648 | 3 | 3.650513 |
| HB30 | 0.202370 | 0.027523 | 0.079511 | 3 | 3.309403 |
| ARF17 | 0.127924 | 0.033639 | 0.039755 | 3 | 3.201319 |
| TGA3 | 0.041904 | 0.051988 | 0.021407 | 3 | 3.115298 |
| AIP2 | 0.030938 | 0.296636 | 0.061162 | 3 | 3.388736 |
| AT5G22890 | 0.026679 | 0.275229 | 0.033639 | 3 | 3.335547 |
| KAN | 0.015178 | 0.137615 | 0.079511 | 3 | 3.232303 |
| PHE1 | 0.010797 | 0.051988 | 0.003058 | 3 | 3.065843 |
| AT5G58900 | 0.010638 | 0.113150 | 0.024465 | 3 | 3.148252 |
| GATA17 | 0.008264 | 0.021407 | 0.048930 | 3 | 3.078601 |
| FIT | 0.006107 | 0.082569 | 0.217125 | 3 | 3.305801 |
| AT2G18670 | 0.002730 | 0.125382 | 0.079511 | 3 | 3.207623 |
| AT1G21580 | 0.000535 | 0.103976 | 0.030581 | 3 | 3.135091 |
| NLP7 | 0.000478 | 0.067278 | 0.051988 | 3 | 3.119744 |
| BZO2H3 | 0.000394 | 0.085627 | 0.082569 | 3 | 3.168590 |
| MBD1 | 0.000328 | 0.073394 | 0.036697 | 3 | 3.110420 |
| HSFB3 | 0.000188 | 0.088685 | 0.030581 | 3 | 3.119454 |
| AT1G25550 | 0.000019 | 0.100917 | 0.042813 | 3 | 3.143750 |
| NLP4 | 0.000009 | 0.100917 | 0.012232 | 3 | 3.113159 |
| AT4G22820 | 0.000009 | 0.061162 | 0.018349 | 3 | 3.079520 |
| WRKY47 | 0.000009 | 0.146789 | 0.036697 | 3 | 3.183496 |
| KNAT5 | 0.000009 | 0.085627 | 0.042813 | 3 | 3.128450 |
In [48]:
## Trichoblast specific
celltype = 'tri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[48]:
| tri_betweenness_centrality | tri_out_centrality | tri_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| AT4G09100 | 0.936047 | 0.828169 | 0.377465 | 3 | 5.141681 |
| AT3G53370 | 0.921238 | 0.526761 | 0.121127 | 3 | 4.569125 |
| RSL4 | 0.878292 | 0.535211 | 0.067606 | 3 | 4.481109 |
| AT5G06800 | 0.863786 | 0.259155 | 0.149296 | 3 | 4.272237 |
| RSL2 | 0.821302 | 0.614085 | 0.078873 | 3 | 4.514260 |
| RHD6 | 0.424899 | 0.645070 | 0.076056 | 3 | 4.146025 |
| AT5G56200 | 0.396873 | 0.095775 | 0.016901 | 3 | 3.509549 |
| OFP13 | 0.390563 | 0.005634 | 0.070423 | 3 | 3.466619 |
| RAP2.11 | 0.007862 | 0.132394 | 0.014085 | 3 | 3.154341 |
| EIL2 | 0.002101 | 0.008451 | 0.036620 | 3 | 3.047171 |
| AT5G65130 | 0.000358 | 0.005634 | 0.019718 | 3 | 3.025710 |
| AT2G05160 | 0.000215 | 0.292958 | 0.019718 | 3 | 3.312891 |
| AT4G39160 | 0.000024 | 0.214085 | 0.016901 | 3 | 3.231010 |
| AT2G20030 | 0.000024 | 0.019718 | 0.030986 | 3 | 3.050728 |
| HB16 | 0.000008 | 0.056338 | 0.205634 | 3 | 3.261980 |
| AT4G01350 | 0.000008 | 0.005634 | 0.028169 | 3 | 3.033811 |
In [49]:
## LRC specific
celltype = 'lrc'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[49]:
| lrc_betweenness_centrality | lrc_out_centrality | lrc_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| CRF2 | 0.982212 | 0.113530 | 0.088647 | 3 | 4.184389 |
| GATA2 | 0.946776 | 0.594090 | 0.553655 | 3 | 5.094521 |
| WRKY17 | 0.940200 | 0.261275 | 0.367030 | 3 | 4.568504 |
| WER | 0.927465 | 0.152411 | 0.281493 | 3 | 4.361368 |
| ERF9 | 0.852882 | 0.026439 | 0.085537 | 3 | 3.964858 |
| NAC016 | 0.843515 | 0.060653 | 0.143079 | 3 | 4.047247 |
| PLT1 | 0.841465 | 0.186625 | 0.062208 | 3 | 4.090299 |
| OFP6 | 0.833140 | 0.506998 | 0.188180 | 3 | 4.528318 |
| AT1G74840 | 0.662716 | 0.202177 | 0.090202 | 3 | 3.955095 |
| HMGB4 | 0.492863 | 0.032659 | 0.063764 | 3 | 3.589286 |
| BZIP34 | 0.424759 | 0.172628 | 0.063764 | 3 | 3.661151 |
| LBD4 | 0.307592 | 0.040435 | 0.111975 | 3 | 3.460003 |
| AT1G69030 | 0.307578 | 0.195956 | 0.079316 | 3 | 3.582850 |
| RBR1 | 0.170933 | 0.076205 | 0.041991 | 3 | 3.289129 |
| NAC060 | 0.067661 | 0.083981 | 0.035770 | 3 | 3.187412 |
| AT5G18090 | 0.066297 | 0.110420 | 0.063764 | 3 | 3.240481 |
| ATS | 0.066007 | 0.267496 | 0.043546 | 3 | 3.377049 |
| PRR7 | 0.034161 | 0.073095 | 0.027994 | 3 | 3.135250 |
| 3xHMG-box2 | 0.003110 | 0.004666 | 0.309487 | 3 | 3.317263 |
| BRM | 0.002919 | 0.020218 | 0.043546 | 3 | 3.066683 |
| CHR38 | 0.001359 | 0.012442 | 0.181960 | 3 | 3.195760 |
| ZF1 | 0.000732 | 0.082426 | 0.021773 | 3 | 3.104931 |
| AT3G52250 | 0.000664 | 0.031104 | 0.017107 | 3 | 3.048875 |
| GRF3 | 0.000497 | 0.087092 | 0.013997 | 3 | 3.101585 |
| AT1G11950 | 0.000177 | 0.029549 | 0.026439 | 3 | 3.056164 |
| RR10 | 0.000114 | 0.119751 | 0.032659 | 3 | 3.152524 |
| GRF2 | 0.000034 | 0.149300 | 0.009331 | 3 | 3.158665 |
| BIM2 | 0.000019 | 0.052877 | 0.055988 | 3 | 3.108884 |
| BNQ3 | 0.000007 | 0.200622 | 0.009331 | 3 | 3.209961 |
| RITF1 | 0.000005 | 0.055988 | 0.021773 | 3 | 3.077765 |
| AGL94 | 0.000002 | 0.045101 | 0.026439 | 3 | 3.071542 |
| COL3 | 0.000002 | 0.031104 | 0.060653 | 3 | 3.091760 |
In [50]:
## Columella specific
celltype = 'col'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[50]:
| col_betweenness_centrality | col_out_centrality | col_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| AT3G52440 | 0.930689 | 0.277778 | 0.107937 | 3 | 4.316403 |
| RR1 | 0.905741 | 0.060317 | 0.109524 | 3 | 4.075582 |
| TRB2 | 0.886169 | 0.050794 | 0.111111 | 3 | 4.048073 |
| NTT | 0.871207 | 0.233333 | 0.090476 | 3 | 4.195016 |
| NAM | 0.861640 | 0.169841 | 0.080952 | 3 | 4.112433 |
| AT3G25790 | 0.847483 | 0.153968 | 0.079365 | 3 | 4.080816 |
| TLP9 | 0.826371 | 0.085714 | 0.123810 | 3 | 4.035895 |
| AT2G22200 | 0.782434 | 0.049206 | 0.076190 | 3 | 3.907831 |
| SNI1 | 0.722674 | 0.298413 | 0.157143 | 3 | 4.178229 |
| MBF1B | 0.518089 | 0.049206 | 0.068254 | 3 | 3.635549 |
| IAA20 | 0.438956 | 0.482540 | 0.455556 | 3 | 4.377051 |
| AT3G08505 | 0.112701 | 0.063492 | 0.063492 | 3 | 3.239685 |
| BEH2 | 0.056896 | 0.026984 | 0.096825 | 3 | 3.180705 |
| GATA5 | 0.053625 | 0.071429 | 0.042857 | 3 | 3.167911 |
| AT5G16680 | 0.042769 | 0.133333 | 0.074603 | 3 | 3.250705 |
| AT5G65910 | 0.039905 | 0.036508 | 0.050794 | 3 | 3.127206 |
| HB23 | 0.035486 | 0.020635 | 0.046032 | 3 | 3.102153 |
| MYC3 | 0.034244 | 0.014286 | 0.023810 | 3 | 3.072340 |
| ARF10 | 0.025992 | 0.153968 | 0.150794 | 3 | 3.330754 |
| BZIP25 | 0.016731 | 0.171429 | 0.101587 | 3 | 3.289747 |
| APRR8 | 0.016146 | 0.039683 | 0.033333 | 3 | 3.089161 |
| AT4G13040 | 0.012706 | 0.042857 | 0.052381 | 3 | 3.107944 |
| FRS8 | 0.011295 | 0.004762 | 0.036508 | 3 | 3.052565 |
| SMZ | 0.010044 | 0.046032 | 0.004762 | 3 | 3.060837 |
| EMB2773 | 0.008643 | 0.007937 | 0.047619 | 3 | 3.064199 |
| STOP1 | 0.006970 | 0.084127 | 0.088889 | 3 | 3.179986 |
| NTM1 | 0.005771 | 0.100000 | 0.076190 | 3 | 3.181962 |
| NFL | 0.005567 | 0.026984 | 0.044444 | 3 | 3.076995 |
| JMJ18 | 0.003250 | 0.030159 | 0.066667 | 3 | 3.100076 |
| AT5G23405 | 0.001693 | 0.138095 | 0.039683 | 3 | 3.179471 |
| GAI | 0.001380 | 0.019048 | 0.049206 | 3 | 3.069634 |
| TRP1 | 0.001297 | 0.031746 | 0.103175 | 3 | 3.136218 |
| AT5G12400 | 0.000510 | 0.042857 | 0.036508 | 3 | 3.079875 |
| CHR17 | 0.000404 | 0.028571 | 0.036508 | 3 | 3.065483 |
| BBX30 | 0.000338 | 0.019048 | 0.017460 | 3 | 3.036846 |
| AT2G33550 | 0.000288 | 0.014286 | 0.019048 | 3 | 3.033621 |
| SPL14 | 0.000151 | 0.039683 | 0.095238 | 3 | 3.135072 |
| IAA10 | 0.000098 | 0.034921 | 0.015873 | 3 | 3.050892 |
| MBD9 | 0.000078 | 0.038095 | 0.015873 | 3 | 3.054046 |
| AT3G05670 | 0.000043 | 0.026984 | 0.049206 | 3 | 3.076233 |
| LUG | 0.000030 | 0.047619 | 0.041270 | 3 | 3.088919 |
| GBF3 | 0.000025 | 0.017460 | 0.073016 | 3 | 3.090501 |
| TGA4 | 0.000023 | 0.033333 | 0.107937 | 3 | 3.141293 |
| CHR11 | 0.000023 | 0.068254 | 0.044444 | 3 | 3.112721 |
| EIN3 | 0.000020 | 0.065079 | 0.066667 | 3 | 3.131766 |
| DRIP2 | 0.000020 | 0.058730 | 0.025397 | 3 | 3.084147 |
| AT2G44430 | 0.000015 | 0.038095 | 0.053968 | 3 | 3.092079 |
| E2F1 | 0.000015 | 0.038095 | 0.014286 | 3 | 3.052396 |
| AGL80 | 0.000010 | 0.019048 | 0.025397 | 3 | 3.044455 |
| CCA1 | 0.000010 | 0.031746 | 0.044444 | 3 | 3.076201 |
| PC-MYB1 | 0.000003 | 0.038095 | 0.063492 | 3 | 3.101590 |
In [51]:
## Ground tissue
celltype1='cor'
celltype2='end'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[51]:
| cor_betweenness_centrality | end_betweenness_centrality | cor_out_centrality | end_out_centrality | cor_in_centrality | end_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|
| AT1G05710 | 0.561959 | 0.011934 | 0.419872 | 0.346614 | 0.089744 | 0.155378 | 6 | 7.585501 |
| LAF1 | 0.097803 | 0.000040 | 0.794872 | 0.192231 | 0.080128 | 0.123506 | 6 | 7.288580 |
| JKD | 0.003896 | 0.017005 | 0.282051 | 0.130478 | 0.266026 | 0.242032 | 6 | 6.941487 |
| MYB122 | 0.008595 | 0.059323 | 0.038462 | 0.288845 | 0.035256 | 0.075697 | 6 | 6.506178 |
| AT4G28030 | 0.001226 | 0.000986 | 0.237179 | 0.039841 | 0.157051 | 0.019920 | 6 | 6.456204 |
| COL4 | 0.000144 | 0.000014 | 0.137821 | 0.057769 | 0.060897 | 0.062749 | 6 | 6.319394 |
| ZFN1 | 0.000010 | 0.000007 | 0.128205 | 0.037849 | 0.099359 | 0.049801 | 6 | 6.315231 |
In [52]:
## Cortex specific
celltype = 'cor'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[52]:
| cor_betweenness_centrality | cor_out_centrality | cor_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| SIGF | 0.969618 | 0.083333 | 0.205128 | 3 | 4.258080 |
| HAM3 | 0.680219 | 0.121795 | 0.214744 | 3 | 4.016757 |
| LRP1 | 0.459302 | 0.214744 | 0.195513 | 3 | 3.869558 |
| tny | 0.424819 | 0.589744 | 0.083333 | 3 | 4.097896 |
| BZS1 | 0.393705 | 0.230769 | 0.096154 | 3 | 3.720628 |
| AT2G42660 | 0.331334 | 0.182692 | 0.317308 | 3 | 3.831334 |
| EIL1 | 0.197533 | 0.076923 | 0.211538 | 3 | 3.485994 |
| RGL3 | 0.144293 | 0.246795 | 0.221154 | 3 | 3.612241 |
| AT2G46810 | 0.091217 | 0.003205 | 0.083333 | 3 | 3.177756 |
| AT1G72210 | 0.085673 | 0.217949 | 0.301282 | 3 | 3.604904 |
| HMG | 0.052045 | 0.064103 | 0.035256 | 3 | 3.151404 |
| SCL27 | 0.015850 | 0.022436 | 0.035256 | 3 | 3.073543 |
| HK2 | 0.008286 | 0.102564 | 0.022436 | 3 | 3.133286 |
| AT2G38300 | 0.006142 | 0.394231 | 0.217949 | 3 | 3.618322 |
| JAZ6 | 0.006070 | 0.243590 | 0.125000 | 3 | 3.374660 |
| GLK2 | 0.003710 | 0.269231 | 0.067308 | 3 | 3.340249 |
| IDD4 | 0.003236 | 0.153846 | 0.051282 | 3 | 3.208364 |
| WRKY69 | 0.001577 | 0.272436 | 0.173077 | 3 | 3.447090 |
| RR3 | 0.000917 | 0.019231 | 0.035256 | 3 | 3.055404 |
| WRKY13 | 0.000330 | 0.019231 | 0.006410 | 3 | 3.025971 |
| ETR2 | 0.000165 | 0.051282 | 0.096154 | 3 | 3.147601 |
| AT3G61180 | 0.000113 | 0.044872 | 0.022436 | 3 | 3.067421 |
| IDD7 | 0.000041 | 0.051282 | 0.019231 | 3 | 3.070554 |
| AT1G68070 | 0.000041 | 0.108974 | 0.051282 | 3 | 3.160298 |
| AT2G44410 | 0.000041 | 0.012821 | 0.032051 | 3 | 3.044913 |
| AGL67 | 0.000021 | 0.009615 | 0.019231 | 3 | 3.028867 |
| ULT1 | 0.000021 | 0.035256 | 0.179487 | 3 | 3.214764 |
In [53]:
## Endodermis specific
celltype = 'end'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[53]:
| end_betweenness_centrality | end_out_centrality | end_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| MYB68 | 7.995992e-01 | 0.703187 | 0.425299 | 3 | 4.928085 |
| MYB36 | 2.451639e-01 | 0.967131 | 1.000996 | 3 | 5.213291 |
| MYB74 | 1.183968e-01 | 0.980080 | 0.934263 | 3 | 5.032739 |
| BLJ | 7.974682e-02 | 0.302789 | 0.343625 | 3 | 3.726161 |
| chr31 | 1.286082e-02 | 0.158367 | 0.138446 | 3 | 3.309674 |
| AGL42 | 1.105548e-02 | 0.174303 | 0.132470 | 3 | 3.317828 |
| WLIM2b | 5.602714e-03 | 0.073705 | 0.087649 | 3 | 3.166957 |
| bZIP58 | 3.717930e-03 | 0.176295 | 0.029880 | 3 | 3.209893 |
| LRL2 | 1.992032e-03 | 0.071713 | 0.049801 | 3 | 3.123506 |
| SCR | 1.579922e-03 | 0.085657 | 0.123506 | 3 | 3.210743 |
| AGL102 | 7.725827e-04 | 0.054781 | 0.009960 | 3 | 3.065514 |
| AT4G36860 | 6.861884e-04 | 0.095618 | 0.085657 | 3 | 3.181961 |
| JAZ12 | 6.494461e-04 | 0.036853 | 0.085657 | 3 | 3.123159 |
| ABF3 | 2.045656e-04 | 0.018924 | 0.042829 | 3 | 3.061958 |
| BZIP17 | 1.568998e-04 | 0.014940 | 0.023904 | 3 | 3.039002 |
| AT5G58620 | 1.320739e-04 | 0.116534 | 0.051793 | 3 | 3.168459 |
| AT2G47850 | 3.872844e-05 | 0.005976 | 0.013944 | 3 | 3.019959 |
| AT2G27580 | 3.674236e-05 | 0.027888 | 0.027888 | 3 | 3.055814 |
| AT5G51790 | 3.177718e-05 | 0.004980 | 0.000996 | 3 | 3.006008 |
| SAP7 | 7.944295e-06 | 0.035857 | 0.048805 | 3 | 3.084669 |
| BIB | 6.951258e-06 | 0.067729 | 0.060757 | 3 | 3.128493 |
| AT2G03470 | 6.951258e-06 | 0.027888 | 0.045817 | 3 | 3.073712 |
| MYB32 | 5.958221e-06 | 0.041833 | 0.050797 | 3 | 3.092635 |
| ING1 | 1.986074e-06 | 0.034861 | 0.022908 | 3 | 3.057771 |
| AT3G18870 | 1.986074e-06 | 0.024900 | 0.013944 | 3 | 3.038847 |
| ALY3 | 9.930368e-07 | 0.028884 | 0.029880 | 3 | 3.058766 |
| VIP1 | 9.930368e-07 | 0.042829 | 0.036853 | 3 | 3.079682 |
| AGL16 | 9.930368e-07 | 0.036853 | 0.035857 | 3 | 3.072710 |
In [54]:
## Stele
celltype1='per'
celltype2='pro'
celltype3='xyl'
celltype4='phl'
ts = tf_spec[tf_spec['tf_occurance']==4][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype4+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype4+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality', celltype4+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==12].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[54]:
| per_betweenness_centrality | pro_betweenness_centrality | xyl_betweenness_centrality | phl_betweenness_centrality | per_out_centrality | pro_out_centrality | xyl_out_centrality | phl_out_centrality | per_in_centrality | pro_in_centrality | xyl_in_centrality | phl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AT3G43430 | 0.856656 | 0.975513 | 0.737149 | 0.963097 | 0.733449 | 0.325062 | 0.108889 | 0.257062 | 0.419861 | 0.454094 | 0.182222 | 0.079096 | 12 | 18.092152 |
In [55]:
## Pericycle
celltype = 'per'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[55]:
| per_betweenness_centrality | per_out_centrality | per_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| HDA3 | 0.980566 | 0.017422 | 0.212544 | 3 | 4.210531 |
| MGP | 0.946777 | 0.106272 | 0.170732 | 3 | 4.223781 |
| NUC | 0.929833 | 0.271777 | 0.156794 | 3 | 4.358405 |
| IDD14 | 0.881995 | 0.162021 | 0.083624 | 3 | 4.127640 |
| GAMMA-H2AX | 0.877064 | 0.026132 | 0.186411 | 3 | 4.089607 |
| GATA15 | 0.852497 | 0.045296 | 0.069686 | 3 | 3.967480 |
| AT1G26790 | 0.809673 | 0.114983 | 0.033101 | 3 | 3.957756 |
| AT3G21330 | 0.670601 | 0.017422 | 0.121951 | 3 | 3.809974 |
| NAC2 | 0.650452 | 0.179443 | 0.026132 | 3 | 3.856027 |
| LBD14 | 0.646205 | 0.059233 | 0.153310 | 3 | 3.858748 |
| KAN2 | 0.510289 | 0.071429 | 0.047038 | 3 | 3.628756 |
| ATL5 | 0.450240 | 0.186411 | 0.029617 | 3 | 3.666268 |
| AT4G30180 | 0.406662 | 0.024390 | 0.029617 | 3 | 3.460669 |
| AT2G20100 | 0.340311 | 0.026132 | 0.024390 | 3 | 3.390834 |
| LBD39 | 0.284823 | 0.043554 | 0.059233 | 3 | 3.387611 |
| IDD11 | 0.256985 | 0.095819 | 0.033101 | 3 | 3.385905 |
| OFP1 | 0.188132 | 0.050523 | 0.022648 | 3 | 3.261303 |
| SOG1 | 0.163413 | 0.216028 | 0.238676 | 3 | 3.618117 |
| MYB34 | 0.134289 | 0.205575 | 0.132404 | 3 | 3.472268 |
| ZFP7 | 0.133356 | 0.043554 | 0.019164 | 3 | 3.196074 |
| ERF3 | 0.071818 | 0.024390 | 0.076655 | 3 | 3.172863 |
| AT1G04850 | 0.058978 | 0.017422 | 0.174216 | 3 | 3.250616 |
| AT2G39020 | 0.047239 | 0.005226 | 0.020906 | 3 | 3.073371 |
| AT2G14880 | 0.028793 | 0.022648 | 0.193380 | 3 | 3.244821 |
| ARIA | 0.022660 | 0.027875 | 0.080139 | 3 | 3.130674 |
| NF-YA3 | 0.019346 | 0.038328 | 0.041812 | 3 | 3.099486 |
| ERF7 | 0.008376 | 0.015679 | 0.087108 | 3 | 3.111164 |
| LBD38 | 0.007680 | 0.270035 | 0.040070 | 3 | 3.317785 |
| TLP1 | 0.006178 | 0.012195 | 0.078397 | 3 | 3.096770 |
| ERF12 | 0.004254 | 0.224739 | 0.217770 | 3 | 3.446762 |
| AT3G03590 | 0.003694 | 0.005226 | 0.048780 | 3 | 3.057701 |
| AL5 | 0.003110 | 0.020906 | 0.034843 | 3 | 3.058859 |
| ATWHY2 | 0.003086 | 0.029617 | 0.094077 | 3 | 3.126779 |
| RAP2.2 | 0.001289 | 0.080139 | 0.081882 | 3 | 3.163310 |
| BBX29 | 0.001076 | 0.017422 | 0.015679 | 3 | 3.034177 |
| IDD16 | 0.000815 | 0.054007 | 0.013937 | 3 | 3.068759 |
| HB21 | 0.000660 | 0.346690 | 0.045296 | 3 | 3.392646 |
| AT4G17900 | 0.000490 | 0.104530 | 0.236934 | 3 | 3.341953 |
| NF-YA2 | 0.000316 | 0.015679 | 0.013937 | 3 | 3.029933 |
| MBD13 | 0.000109 | 0.003484 | 0.013937 | 3 | 3.017531 |
| NST1 | 0.000091 | 0.019164 | 0.012195 | 3 | 3.031450 |
| AT2G42040 | 0.000085 | 0.059233 | 0.116725 | 3 | 3.176043 |
| WRKY21 | 0.000064 | 0.081882 | 0.118467 | 3 | 3.200412 |
| MBD5 | 0.000052 | 0.001742 | 0.057491 | 3 | 3.059285 |
| GATA16 | 0.000049 | 0.045296 | 0.010453 | 3 | 3.055798 |
| PRT1 | 0.000043 | 0.128920 | 0.141115 | 3 | 3.270077 |
| NF-YB8 | 0.000009 | 0.022648 | 0.036585 | 3 | 3.059243 |
| SHR | 0.000006 | 0.055749 | 0.148084 | 3 | 3.203839 |
| MYB65 | 0.000006 | 0.024390 | 0.031359 | 3 | 3.055755 |
| CDF2 | 0.000003 | 0.078397 | 0.055749 | 3 | 3.134149 |
In [56]:
## Procambium
celltype = 'pro'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[56]:
| pro_betweenness_centrality | pro_out_centrality | pro_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| HB18 | 0.924058 | 0.089330 | 0.004963 | 3 | 4.018351 |
| AT1G51200 | 0.888708 | 0.091811 | 0.188586 | 3 | 4.169105 |
| MYB6 | 0.303168 | 0.049628 | 0.022333 | 3 | 3.375128 |
| AT4G17780 | 0.086818 | 0.002481 | 0.029777 | 3 | 3.119076 |
| IAA9 | 0.053017 | 0.578164 | 0.421836 | 3 | 4.053017 |
| AT1G75490 | 0.006487 | 0.007444 | 0.009926 | 3 | 3.023857 |
| HAT9 | 0.002913 | 0.002481 | 0.022333 | 3 | 3.027727 |
| STO | 0.002481 | 0.183623 | 0.124069 | 3 | 3.310174 |
| TAFII15 | 0.002284 | 0.027295 | 0.062035 | 3 | 3.091614 |
| AT2G40200 | 0.001228 | 0.042184 | 0.027295 | 3 | 3.070707 |
| GRP2 | 0.000031 | 0.099256 | 0.186104 | 3 | 3.285391 |
| SPL1 | 0.000012 | 0.066998 | 0.114144 | 3 | 3.181154 |
| AT1G19000 | 0.000006 | 0.037221 | 0.074442 | 3 | 3.111669 |
In [57]:
## Xylem
celltype = 'xyl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[57]:
| xyl_betweenness_centrality | xyl_out_centrality | xyl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| AT1G68200 | 0.987355 | 0.522222 | 0.284444 | 3 | 4.794021 |
| VND6 | 0.982910 | 0.340000 | 0.146667 | 3 | 4.469577 |
| MYB85 | 0.978411 | 0.202222 | 0.233333 | 3 | 4.413967 |
| LBD31 | 0.969220 | 0.297778 | 0.228889 | 3 | 4.495887 |
| MYB83 | 0.967330 | 0.768889 | 0.233333 | 3 | 4.969552 |
| VND7 | 0.953784 | 0.742222 | 0.153333 | 3 | 4.849339 |
| MYB99 | 0.953571 | 0.208889 | 0.164444 | 3 | 4.326904 |
| VND1 | 0.947642 | 0.344444 | 0.131111 | 3 | 4.423197 |
| AT4G16610 | 0.941232 | 0.153333 | 0.126667 | 3 | 4.221232 |
| FBH1 | 0.941153 | 0.191111 | 0.091111 | 3 | 4.223375 |
| AT3G10470 | 0.940470 | 0.095556 | 0.097778 | 3 | 4.133804 |
| HB31 | 0.932591 | 0.293333 | 0.235556 | 3 | 4.461480 |
| XND1 | 0.893719 | 0.404444 | 0.168889 | 3 | 4.467053 |
| AT3G22560 | 0.879713 | 0.044444 | 0.097778 | 3 | 4.021935 |
| VND5 | 0.856936 | 0.657778 | 0.046667 | 3 | 4.561381 |
| MYB46 | 0.855734 | 0.864444 | 0.468889 | 3 | 5.189067 |
| IAA6 | 0.855214 | 0.524444 | 0.060000 | 3 | 4.439659 |
| VND4 | 0.728755 | 0.751111 | 0.295556 | 3 | 4.775422 |
| MYB52 | 0.671829 | 0.288889 | 0.177778 | 3 | 4.138495 |
| AT2G04845 | 0.656407 | 0.115556 | 0.155556 | 3 | 3.927518 |
| ZHD3 | 0.631235 | 0.457778 | 0.291111 | 3 | 4.380124 |
| VND3 | 0.618471 | 0.717778 | 0.277778 | 3 | 4.614026 |
| ARR9 | 0.602460 | 0.017778 | 0.028889 | 3 | 3.649126 |
| AT3G22100 | 0.599668 | 0.028889 | 0.037778 | 3 | 3.666335 |
| AT1G66810 | 0.342371 | 0.600000 | 0.377778 | 3 | 4.320148 |
| OFP10 | 0.217961 | 0.168889 | 0.011111 | 3 | 3.397961 |
| TCP20 | 0.188721 | 0.055556 | 0.251111 | 3 | 3.495387 |
| VND2 | 0.157496 | 0.844444 | 0.457778 | 3 | 4.459718 |
| MMD1 | 0.114437 | 0.026667 | 0.002222 | 3 | 3.143326 |
| SHP1 | 0.091725 | 0.160000 | 0.046667 | 3 | 3.298391 |
| AT5G25470 | 0.060802 | 0.013333 | 0.020000 | 3 | 3.094135 |
| AT1G26590 | 0.050012 | 0.026667 | 0.053333 | 3 | 3.130012 |
| BZIP49 | 0.049567 | 0.115556 | 0.086667 | 3 | 3.251789 |
| ABF4 | 0.033224 | 0.042222 | 0.015556 | 3 | 3.091002 |
| AT3G19080 | 0.031081 | 0.042222 | 0.006667 | 3 | 3.079970 |
| SHY2 | 0.015575 | 0.006667 | 0.028889 | 3 | 3.051131 |
| HB34 | 0.014843 | 0.131111 | 0.053333 | 3 | 3.199287 |
| ASL9 | 0.005009 | 0.077778 | 0.040000 | 3 | 3.122786 |
| TCP10 | 0.004425 | 0.002222 | 0.013333 | 3 | 3.019980 |
| PLIM2b | 0.002514 | 0.173333 | 0.186667 | 3 | 3.362514 |
| GIF3 | 0.002301 | 0.168889 | 0.071111 | 3 | 3.242301 |
| MYB25 | 0.001930 | 0.062222 | 0.086667 | 3 | 3.150819 |
| AT3G10760 | 0.001915 | 0.108889 | 0.084444 | 3 | 3.195249 |
| AT5G04390 | 0.000678 | 0.033333 | 0.026667 | 3 | 3.060678 |
| AT5G46910 | 0.000129 | 0.186667 | 0.100000 | 3 | 3.286795 |
| AP3 | 0.000084 | 0.091111 | 0.100000 | 3 | 3.191195 |
| GATA1 | 0.000020 | 0.008889 | 0.020000 | 3 | 3.028909 |
In [58]:
## Phloem
celltype = 'phl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[58]:
| phl_betweenness_centrality | phl_out_centrality | phl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| AT4G37180 | 0.969452 | 0.511299 | 0.122881 | 3 | 4.603633 |
| HCA2 | 0.799051 | 0.230226 | 0.183616 | 3 | 4.212893 |
| DOF2.4 | 0.741457 | 0.129944 | 0.121469 | 3 | 3.992870 |
| APL | 0.642254 | 0.985876 | 1.001412 | 3 | 5.629542 |
| AT5G02460 | 0.545010 | 0.028249 | 0.170904 | 3 | 3.744162 |
| AT5G41380 | 0.372484 | 0.557910 | 0.060734 | 3 | 3.991128 |
| NAC057 | 0.328938 | 0.331921 | 0.166667 | 3 | 3.827526 |
| BHLH101 | 0.285558 | 0.045198 | 0.052260 | 3 | 3.383016 |
| AS1 | 0.271196 | 0.019774 | 0.038136 | 3 | 3.329106 |
| AT3G12730 | 0.188177 | 0.844633 | 0.604520 | 3 | 4.637329 |
| NF-YB3 | 0.177087 | 0.001412 | 0.011299 | 3 | 3.189799 |
| AT2G03500 | 0.105529 | 0.615819 | 0.560734 | 3 | 4.282082 |
| VOZ1 | 0.054274 | 0.175141 | 0.001412 | 3 | 3.230827 |
| AT1G72010 | 0.047417 | 0.029661 | 0.073446 | 3 | 3.150525 |
| AT2G31370 | 0.033317 | 0.032486 | 0.066384 | 3 | 3.132187 |
| CRF10 | 0.004877 | 0.001412 | 0.005650 | 3 | 3.011939 |
| NAC020 | 0.004237 | 0.079096 | 0.153955 | 3 | 3.237288 |
| AT5G09240 | 0.002977 | 0.014124 | 0.049435 | 3 | 3.066536 |
| AT1G49560 | 0.001420 | 0.104520 | 0.132768 | 3 | 3.238709 |
| SOL1 | 0.000981 | 0.050847 | 0.036723 | 3 | 3.088552 |
| AT1G64530 | 0.000957 | 0.066384 | 0.022599 | 3 | 3.089940 |
| AT1G72740 | 0.000503 | 0.036723 | 0.046610 | 3 | 3.083837 |
| AT5G12850 | 0.000438 | 0.015537 | 0.057910 | 3 | 3.073884 |
| SYD | 0.000066 | 0.018362 | 0.021186 | 3 | 3.039614 |
| AT5G09460 | 0.000042 | 0.015537 | 0.022599 | 3 | 3.038178 |
| RSZ22a | 0.000024 | 0.009887 | 0.060734 | 3 | 3.070645 |
| AT1G58220 | 0.000012 | 0.009887 | 0.062147 | 3 | 3.072046 |
| CRF1 | 0.000006 | 0.028249 | 0.076271 | 3 | 3.104526 |
| MBF1A | 0.000004 | 0.042373 | 0.045198 | 3 | 3.087575 |
| bHLH104 | 0.000004 | 0.039548 | 0.062147 | 3 | 3.101699 |
| NAC045 | 0.000004 | 0.031073 | 0.040960 | 3 | 3.072038 |
| AT5G16470 | 0.000002 | 0.060734 | 0.091808 | 3 | 3.152544 |
| HMGB1 | 0.000002 | 0.053672 | 0.070621 | 3 | 3.124296 |
Search for individual genes¶
In [59]:
gene = 'SHR'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[59]:
| tf_occurance | end_degree_centrality | end_out_centrality | end_in_centrality | end_closeness_centrality | end_eigenvector_centrality | per_degree_centrality | per_out_centrality | per_in_centrality | per_betweenness_centrality | per_closeness_centrality | per_eigenvector_centrality | pro_degree_centrality | pro_out_centrality | pro_in_centrality | pro_closeness_centrality | pro_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SHR | 1.0 | 0.007968 | 0.002988 | 0.00498 | 0.000243 | 0.006429 | 0.203833 | 0.055749 | 0.148084 | 0.000006 | 0.000635 | 0.053931 | 0.052109 | 0.027295 | 0.024814 | 0.000196 | 0.026289 |
In [60]:
gene = 'BLJ'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[60]:
| tf_occurance | end_degree_centrality | end_out_centrality | end_in_centrality | end_betweenness_centrality | end_closeness_centrality | end_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|
| BLJ | 1.0 | 0.646414 | 0.302789 | 0.343625 | 0.079747 | 0.00042 | 0.124605 |
In [61]:
gene = 'JKD'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[61]:
| tf_occurance | cor_degree_centrality | cor_out_centrality | cor_in_centrality | cor_betweenness_centrality | cor_closeness_centrality | cor_eigenvector_centrality | end_degree_centrality | end_out_centrality | end_in_centrality | end_betweenness_centrality | end_closeness_centrality | end_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| JKD | 2.0 | 0.548077 | 0.282051 | 0.266026 | 0.003896 | 0.000308 | 0.119496 | 0.37251 | 0.130478 | 0.242032 | 0.017005 | 0.000413 | 0.100787 |
In [62]:
gene = 'RVN'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[62]:
| end_degree_centrality | end_out_centrality | end_in_centrality | end_closeness_centrality | end_eigenvector_centrality | |
|---|---|---|---|---|---|
| RVN | 0.065737 | 0.034861 | 0.030876 | 0.00035 | 0.033994 |
In [63]:
gene = 'BIB'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[63]:
| tf_occurance | end_degree_centrality | end_out_centrality | end_in_centrality | end_betweenness_centrality | end_closeness_centrality | end_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|
| BIB | 1.0 | 0.128486 | 0.067729 | 0.060757 | 0.000007 | 0.000372 | 0.05411 |
In [64]:
gene = 'IME'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[64]:
In [65]:
gene = 'MYB66'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[65]:
In [66]:
gene = 'GL2'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[66]:
| tf_occurance | atri_degree_centrality | atri_out_centrality | atri_in_centrality | atri_betweenness_centrality | atri_closeness_centrality | atri_eigenvector_centrality | lrc_degree_centrality | lrc_out_centrality | lrc_in_centrality | lrc_closeness_centrality | lrc_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| GL2 | 1.0 | 0.376147 | 0.149847 | 0.2263 | 0.760577 | 0.000623 | 0.090427 | 0.007776 | 0.006221 | 0.001555 | 0.000657 | 0.006107 |
In [67]:
tf_spec.to_csv('TF_GRN_centrality_t5-t7_zscore3.csv', index=True)
In [ ]: